Google Sheets

Reading data with the googlesheets4 package

First, set up Google credentials. This doesn’t currently work with AnVIL.

library(googlesheets4)
# Prompts a browser pop-up
gs4_auth()
# Once set up, you can automate this process by passing your email
gs4_auth(email = "avamariehoffman@gmail.com")

Reading data with the googlesheets4 package

You can also supply an authorization token directly, but make sure to add any files to your .gitignore!

library(googledrive)
drive_auth(email= "<email>",
           token = readRDS("google-sheets-token.rds")) # Saved in a file

Reading data with the googlesheets4 package

Read in using read_sheet()

sheet_url <- 
  "https://docs.google.com/spreadsheets/d/1U6Cf_qEOhiR9AZqTqS3mbMF3zt2db48ZP5v3rkrAEJY/edit#gid=780868077"
sheet_dat_1 <- read_sheet(sheet_url)
Auto-refreshing stale OAuth token.
✔ Reading from "gapminder".
✔ Range 'Africa'.
head(sheet_dat_1)
# A tibble: 6 × 6
  country continent  year lifeExp      pop gdpPercap
  <chr>   <chr>     <dbl>   <dbl>    <dbl>     <dbl>
1 Algeria Africa     1952    43.1  9279525     2449.
2 Algeria Africa     1957    45.7 10270856     3014.
3 Algeria Africa     1962    48.3 11000948     2551.
4 Algeria Africa     1967    51.4 12760499     3247.
5 Algeria Africa     1972    54.5 14760787     4183.
6 Algeria Africa     1977    58.0 17152804     4910.

Reading data with the googlesheets4 package

Specify the sheet name if necessary:

sheet_dat_oceania <- read_sheet(sheet_url, sheet = "Oceania")
✔ Reading from "gapminder".
✔ Range ''Oceania''.
head(sheet_dat_oceania)
# A tibble: 6 × 6
  country   continent  year lifeExp      pop gdpPercap
  <chr>     <chr>     <dbl>   <dbl>    <dbl>     <dbl>
1 Australia Oceania    1952    69.1  8691212    10040.
2 Australia Oceania    1957    70.3  9712569    10950.
3 Australia Oceania    1962    70.9 10794968    12217.
4 Australia Oceania    1967    71.1 11872264    14526.
5 Australia Oceania    1972    71.9 13177000    16789.
6 Australia Oceania    1977    73.5 14074100    18334.

Pull in a subset of data: rows

read_sheet(sheet_url, sheet = "Oceania", range =  cell_rows(1:4))
✔ Reading from "gapminder".
✔ Range ''Oceania'!1:4'.
# A tibble: 3 × 6
  country   continent  year lifeExp      pop gdpPercap
  <chr>     <chr>     <dbl>   <dbl>    <dbl>     <dbl>
1 Australia Oceania    1952    69.1  8691212    10040.
2 Australia Oceania    1957    70.3  9712569    10950.
3 Australia Oceania    1962    70.9 10794968    12217.

Pull in a subset of data: columns

read_sheet(sheet_url, sheet = "Oceania", range =  cell_cols("A:B"))
✔ Reading from "gapminder".
✔ Range ''Oceania'!A:B'.
# A tibble: 24 × 2
   country   continent
   <chr>     <chr>    
 1 Australia Oceania  
 2 Australia Oceania  
 3 Australia Oceania  
 4 Australia Oceania  
 5 Australia Oceania  
 6 Australia Oceania  
 7 Australia Oceania  
 8 Australia Oceania  
 9 Australia Oceania  
10 Australia Oceania  
# … with 14 more rows

Reading data with the googlesheets4 package

List out the sheet names using sheet_names().

sheet_names(sheet_url)
[1] "Africa"   "Americas" "Asia"     "Europe"   "Oceania" 

Reading data with the googlesheets4 package

Iterate through the sheet names:

gapminder_sheets <- sheet_names(sheet_url)

data_list <- list()
for(g_sheet in gapminder_sheets){
  data_list[[g_sheet]] = read_sheet(sheet_url, sheet = g_sheet)
}
✔ Reading from "gapminder".
✔ Range ''Africa''.
✔ Reading from "gapminder".
✔ Range ''Americas''.
✔ Reading from "gapminder".
✔ Range ''Asia''.
✔ Reading from "gapminder".
✔ Range ''Europe''.
✔ Reading from "gapminder".
✔ Range ''Oceania''.

Reading data with the googlesheets4 package

Iterate through the sheet names:

str(data_list)
List of 5
 $ Africa  : tibble [624 × 6] (S3: tbl_df/tbl/data.frame)
  ..$ country  : chr [1:624] "Algeria" "Algeria" "Algeria" "Algeria" ...
  ..$ continent: chr [1:624] "Africa" "Africa" "Africa" "Africa" ...
  ..$ year     : num [1:624] 1952 1957 1962 1967 1972 ...
  ..$ lifeExp  : num [1:624] 43.1 45.7 48.3 51.4 54.5 ...
  ..$ pop      : num [1:624] 9279525 10270856 11000948 12760499 14760787 ...
  ..$ gdpPercap: num [1:624] 2449 3014 2551 3247 4183 ...
 $ Americas: tibble [300 × 6] (S3: tbl_df/tbl/data.frame)
  ..$ country  : chr [1:300] "Argentina" "Argentina" "Argentina" "Argentina" ...
  ..$ continent: chr [1:300] "Americas" "Americas" "Americas" "Americas" ...
  ..$ year     : num [1:300] 1952 1957 1962 1967 1972 ...
  ..$ lifeExp  : num [1:300] 62.5 64.4 65.1 65.6 67.1 ...
  ..$ pop      : num [1:300] 17876956 19610538 21283783 22934225 24779799 ...
  ..$ gdpPercap: num [1:300] 5911 6857 7133 8053 9443 ...
 $ Asia    : tibble [396 × 6] (S3: tbl_df/tbl/data.frame)
  ..$ country  : chr [1:396] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
  ..$ continent: chr [1:396] "Asia" "Asia" "Asia" "Asia" ...
  ..$ year     : num [1:396] 1952 1957 1962 1967 1972 ...
  ..$ lifeExp  : num [1:396] 28.8 30.3 32 34 36.1 ...
  ..$ pop      : num [1:396] 8425333 9240934 10267083 11537966 13079460 ...
  ..$ gdpPercap: num [1:396] 779 821 853 836 740 ...
 $ Europe  : tibble [360 × 6] (S3: tbl_df/tbl/data.frame)
  ..$ country  : chr [1:360] "Albania" "Albania" "Albania" "Albania" ...
  ..$ continent: chr [1:360] "Europe" "Europe" "Europe" "Europe" ...
  ..$ year     : num [1:360] 1952 1957 1962 1967 1972 ...
  ..$ lifeExp  : num [1:360] 55.2 59.3 64.8 66.2 67.7 ...
  ..$ pop      : num [1:360] 1282697 1476505 1728137 1984060 2263554 ...
  ..$ gdpPercap: num [1:360] 1601 1942 2313 2760 3313 ...
 $ Oceania : tibble [24 × 6] (S3: tbl_df/tbl/data.frame)
  ..$ country  : chr [1:24] "Australia" "Australia" "Australia" "Australia" ...
  ..$ continent: chr [1:24] "Oceania" "Oceania" "Oceania" "Oceania" ...
  ..$ year     : num [1:24] 1952 1957 1962 1967 1972 ...
  ..$ lifeExp  : num [1:24] 69.1 70.3 70.9 71.1 71.9 ...
  ..$ pop      : num [1:24] 8691212 9712569 10794968 11872264 13177000 ...
  ..$ gdpPercap: num [1:24] 10040 10950 12217 14526 16789 ...

Reading data with the googlesheets4 package

Pull out sheets as needed:

data_list[[{sheet}]]
# OR
data_list${sheet}

.

Writing data with the googlesheets4 package

sheet_dat_oceania <- data_list$Oceania

sheet_dat_oceania <- sheet_dat_oceania %>%
  mutate(lifeExp_days = lifeExp * 365)

sheet_out <- gs4_create("Oceania-days", 
                       sheets = list(Oceania_days = sheet_dat_oceania))
✔ Creating new Sheet: "Oceania-days".
# Opens a browser window
gs4_browse(sheet_out)

Append data with the googlesheets4 package

sheet_append(sheet_out, data = sheet_dat_oceania, sheet = "Oceania_days")
✔ Writing to "Oceania-days".
✔ Appending 24 rows to 'Oceania_days'.

JHU Tidyverse Book

googlesheets Lab
http://sisbid.github.io/Data-Wrangling/labs/advanced-io-lab.Rmd

JSON: JavaScript Object Notation
Lists of stuff

Why JSON matters

#install.packages("jsonlite")
library(jsonlite)
jsonData <-
  fromJSON("https://raw.githubusercontent.com/Biuni/PokemonGO-Pokedex/master/pokedex.json")
head(jsonData)
$pokemon
   id num               name                                              img           type height   weight                    candy
1   1 001          Bulbasaur http://www.serebii.net/pokemongo/pokemon/001.png  Grass, Poison 0.71 m   6.9 kg          Bulbasaur Candy
2   2 002            Ivysaur http://www.serebii.net/pokemongo/pokemon/002.png  Grass, Poison 0.99 m  13.0 kg          Bulbasaur Candy
3   3 003           Venusaur http://www.serebii.net/pokemongo/pokemon/003.png  Grass, Poison 2.01 m 100.0 kg          Bulbasaur Candy
4   4 004         Charmander http://www.serebii.net/pokemongo/pokemon/004.png           Fire 0.61 m   8.5 kg         Charmander Candy
5   5 005         Charmeleon http://www.serebii.net/pokemongo/pokemon/005.png           Fire 1.09 m  19.0 kg         Charmander Candy
6   6 006          Charizard http://www.serebii.net/pokemongo/pokemon/006.png   Fire, Flying 1.70 m  90.5 kg         Charmander Candy
7   7 007           Squirtle http://www.serebii.net/pokemongo/pokemon/007.png          Water 0.51 m   9.0 kg           Squirtle Candy
8   8 008          Wartortle http://www.serebii.net/pokemongo/pokemon/008.png          Water 0.99 m  22.5 kg           Squirtle Candy
9   9 009          Blastoise http://www.serebii.net/pokemongo/pokemon/009.png          Water 1.60 m  85.5 kg           Squirtle Candy
10 10 010           Caterpie http://www.serebii.net/pokemongo/pokemon/010.png            Bug 0.30 m   2.9 kg           Caterpie Candy
11 11 011            Metapod http://www.serebii.net/pokemongo/pokemon/011.png            Bug 0.71 m   9.9 kg           Caterpie Candy
12 12 012         Butterfree http://www.serebii.net/pokemongo/pokemon/012.png    Bug, Flying 1.09 m  32.0 kg           Caterpie Candy
13 13 013             Weedle http://www.serebii.net/pokemongo/pokemon/013.png    Bug, Poison 0.30 m   3.2 kg             Weedle Candy
14 14 014             Kakuna http://www.serebii.net/pokemongo/pokemon/014.png    Bug, Poison 0.61 m  10.0 kg             Weedle Candy
15 15 015           Beedrill http://www.serebii.net/pokemongo/pokemon/015.png    Bug, Poison 0.99 m  29.5 kg             Weedle Candy
16 16 016             Pidgey http://www.serebii.net/pokemongo/pokemon/016.png Normal, Flying 0.30 m   1.8 kg             Pidgey Candy
17 17 017          Pidgeotto http://www.serebii.net/pokemongo/pokemon/017.png Normal, Flying 1.09 m  30.0 kg             Pidgey Candy
18 18 018            Pidgeot http://www.serebii.net/pokemongo/pokemon/018.png Normal, Flying 1.50 m  39.5 kg             Pidgey Candy
19 19 019            Rattata http://www.serebii.net/pokemongo/pokemon/019.png         Normal 0.30 m   3.5 kg            Rattata Candy
20 20 020           Raticate http://www.serebii.net/pokemongo/pokemon/020.png         Normal 0.71 m  18.5 kg            Rattata Candy
21 21 021            Spearow http://www.serebii.net/pokemongo/pokemon/021.png Normal, Flying 0.30 m   2.0 kg            Spearow Candy
22 22 022             Fearow http://www.serebii.net/pokemongo/pokemon/022.png Normal, Flying 1.19 m  38.0 kg            Spearow Candy
23 23 023              Ekans http://www.serebii.net/pokemongo/pokemon/023.png         Poison 2.01 m   6.9 kg              Ekans Candy
24 24 024              Arbok http://www.serebii.net/pokemongo/pokemon/024.png         Poison 3.51 m  65.0 kg              Ekans Candy
25 25 025            Pikachu http://www.serebii.net/pokemongo/pokemon/025.png       Electric 0.41 m   6.0 kg            Pikachu Candy
26 26 026             Raichu http://www.serebii.net/pokemongo/pokemon/026.png       Electric 0.79 m  30.0 kg            Pikachu Candy
27 27 027          Sandshrew http://www.serebii.net/pokemongo/pokemon/027.png         Ground 0.61 m  12.0 kg          Sandshrew Candy
28 28 028          Sandslash http://www.serebii.net/pokemongo/pokemon/028.png         Ground 0.99 m  29.5 kg          Sandshrew Candy
29 29 029 Nidoran ♀ (Female) http://www.serebii.net/pokemongo/pokemon/029.png         Poison 0.41 m   7.0 kg Nidoran ♀ (Female) Candy
30 30 030           Nidorina http://www.serebii.net/pokemongo/pokemon/030.png         Poison 0.79 m  20.0 kg Nidoran ♀ (Female) Candy
31 31 031          Nidoqueen http://www.serebii.net/pokemongo/pokemon/031.png Poison, Ground 1.30 m  60.0 kg Nidoran ♀ (Female) Candy
32 32 032   Nidoran ♂ (Male) http://www.serebii.net/pokemongo/pokemon/032.png         Poison 0.51 m   9.0 kg   Nidoran ♂ (Male) Candy
33 33 033           Nidorino http://www.serebii.net/pokemongo/pokemon/033.png         Poison 0.89 m  19.5 kg   Nidoran ♂ (Male) Candy
34 34 034           Nidoking http://www.serebii.net/pokemongo/pokemon/034.png Poison, Ground 1.40 m  62.0 kg   Nidoran ♂ (Male) Candy
35 35 035           Clefairy http://www.serebii.net/pokemongo/pokemon/035.png         Normal 0.61 m   7.5 kg           Clefairy Candy
36 36 036           Clefable http://www.serebii.net/pokemongo/pokemon/036.png         Normal 1.30 m  40.0 kg           Clefairy Candy
37 37 037             Vulpix http://www.serebii.net/pokemongo/pokemon/037.png           Fire 0.61 m   9.9 kg             Vulpix Candy
38 38 038          Ninetales http://www.serebii.net/pokemongo/pokemon/038.png           Fire 1.09 m  19.9 kg             Vulpix Candy
39 39 039         Jigglypuff http://www.serebii.net/pokemongo/pokemon/039.png         Normal 0.51 m   5.5 kg         Jigglypuff Candy
40 40 040         Wigglytuff http://www.serebii.net/pokemongo/pokemon/040.png         Normal 0.99 m  12.0 kg         Jigglypuff Candy
41 41 041              Zubat http://www.serebii.net/pokemongo/pokemon/041.png Poison, Flying 0.79 m   7.5 kg              Zubat Candy
42 42 042             Golbat http://www.serebii.net/pokemongo/pokemon/042.png Poison, Flying 1.60 m  55.0 kg              Zubat Candy
43 43 043             Oddish http://www.serebii.net/pokemongo/pokemon/043.png  Grass, Poison 0.51 m   5.4 kg             Oddish Candy
44 44 044              Gloom http://www.serebii.net/pokemongo/pokemon/044.png  Grass, Poison 0.79 m   8.6 kg             Oddish Candy
45 45 045          Vileplume http://www.serebii.net/pokemongo/pokemon/045.png  Grass, Poison 1.19 m  18.6 kg             Oddish Candy
46 46 046              Paras http://www.serebii.net/pokemongo/pokemon/046.png     Bug, Grass 0.30 m   5.4 kg              Paras Candy
47 47 047           Parasect http://www.serebii.net/pokemongo/pokemon/047.png     Bug, Grass 0.99 m  29.5 kg              Paras Candy
48 48 048            Venonat http://www.serebii.net/pokemongo/pokemon/048.png    Bug, Poison 0.99 m  30.0 kg            Venonat Candy
49 49 049           Venomoth http://www.serebii.net/pokemongo/pokemon/049.png    Bug, Poison 1.50 m  12.5 kg            Venonat Candy
50 50 050            Diglett http://www.serebii.net/pokemongo/pokemon/050.png         Ground 0.20 m   0.8 kg            Diglett Candy
51 51 051            Dugtrio http://www.serebii.net/pokemongo/pokemon/051.png         Ground 0.71 m  33.3 kg                  Dugtrio
52 52 052             Meowth http://www.serebii.net/pokemongo/pokemon/052.png         Normal 0.41 m   4.2 kg             Meowth Candy
53 53 053            Persian http://www.serebii.net/pokemongo/pokemon/053.png         Normal 0.99 m  32.0 kg             Meowth Candy
54 54 054            Psyduck http://www.serebii.net/pokemongo/pokemon/054.png          Water 0.79 m  19.6 kg            Psyduck Candy
55 55 055            Golduck http://www.serebii.net/pokemongo/pokemon/055.png          Water 1.70 m  76.6 kg            Psyduck Candy
56 56 056             Mankey http://www.serebii.net/pokemongo/pokemon/056.png       Fighting 0.51 m  28.0 kg             Mankey Candy
57 57 057           Primeape http://www.serebii.net/pokemongo/pokemon/057.png       Fighting 0.99 m  32.0 kg             Mankey Candy
58 58 058          Growlithe http://www.serebii.net/pokemongo/pokemon/058.png           Fire 0.71 m  19.0 kg          Growlithe Candy
   candy_count         egg spawn_chance avg_spawns spawn_time multipliers                           weaknesses
1           25        2 km       0.6900     69.000      20:00        1.58           Fire, Ice, Flying, Psychic
2          100 Not in Eggs       0.0420      4.200      07:00    1.2, 1.6           Fire, Ice, Flying, Psychic
3           NA Not in Eggs       0.0170      1.700      11:30        NULL           Fire, Ice, Flying, Psychic
4           25        2 km       0.2530     25.300      08:45        1.65                  Water, Ground, Rock
5          100 Not in Eggs       0.0120      1.200      19:00        1.79                  Water, Ground, Rock
6           NA Not in Eggs       0.0031      0.310      13:34        NULL                Water, Electric, Rock
7           25        2 km       0.5800     58.000      04:25         2.1                      Electric, Grass
8          100 Not in Eggs       0.0340      3.400      07:02         1.4                      Electric, Grass
9           NA Not in Eggs       0.0067      0.670      00:06        NULL                      Electric, Grass
10          12        2 km       3.0320    303.200      16:35        1.05                   Fire, Flying, Rock
11          50 Not in Eggs       0.1870     18.700      02:11  3.55, 3.79                   Fire, Flying, Rock
12          NA Not in Eggs       0.0220      2.200      05:23        NULL    Fire, Electric, Ice, Flying, Rock
13          12        2 km       7.1200    712.000      02:21  1.01, 1.09          Fire, Flying, Psychic, Rock
14          50 Not in Eggs       0.4400     44.000      02:30  3.01, 3.41          Fire, Flying, Psychic, Rock
15          NA Not in Eggs       0.0510      5.100      04:50        NULL          Fire, Flying, Psychic, Rock
16          12        2 km      15.9800      1.598      01:34  1.71, 1.92                       Electric, Rock
17          50 Not in Eggs       1.0200    102.000      01:30        1.79                       Electric, Rock
18          NA Not in Eggs       0.1300     13.000      01:50        NULL                       Electric, Rock
19          25        2 km      13.0500      1.305      01:55  2.55, 2.73                             Fighting
20          NA Not in Eggs       0.4100     41.000      01:56        NULL                             Fighting
21          50        2 km       4.7300    473.000      12:25  2.66, 2.68                       Electric, Rock
22          NA Not in Eggs       0.1500     15.000      01:11        NULL                       Electric, Rock
23          50        5 km       2.2700    227.000      12:20  2.21, 2.27                      Ground, Psychic
24          NA Not in Eggs       0.0720      7.200      01:50        NULL                      Ground, Psychic
25          50        2 km       0.2100     21.000      04:00        2.34                               Ground
26          NA Not in Eggs       0.0076      0.760      23:58        NULL                               Ground
27          50        5 km       1.1100    111.000      01:58        2.45                    Water, Grass, Ice
28          NA Not in Eggs       0.0370      3.700      12:34        NULL                    Water, Grass, Ice
29          25        5 km       1.3800    138.000      01:51  1.63, 2.48                      Ground, Psychic
30         100 Not in Eggs       0.0880      8.800      07:22  1.83, 2.48                      Ground, Psychic
31          NA Not in Eggs       0.0120      1.200      12:35        NULL          Water, Ice, Ground, Psychic
32          25        5 km       1.3100    131.000      01:12  1.64, 1.70                      Ground, Psychic
33         100 Not in Eggs       0.0830      8.300      09:02        1.83                      Ground, Psychic
34          NA Not in Eggs       0.0170      1.700      12:16        NULL          Water, Ice, Ground, Psychic
35          50        2 km       0.9200     92.000      03:30  2.03, 2.14                             Fighting
36          NA Not in Eggs       0.0120      1.200      03:29        NULL                             Fighting
37          50        5 km       0.2200     22.000      13:43  2.74, 2.81                  Water, Ground, Rock
38          NA Not in Eggs       0.0077      0.770      01:32        NULL                  Water, Ground, Rock
39          50        2 km       0.3900     39.000      08:46        1.85                             Fighting
40          NA Not in Eggs       0.0180      1.800      12:28        NULL                             Fighting
41          50        2 km       6.5200    652.000      12:28  2.60, 3.67         Electric, Ice, Psychic, Rock
42          NA Not in Eggs       0.4200     42.000      02:15        NULL         Electric, Ice, Psychic, Rock
43          25        5 km       1.0200    102.000      03:58         1.5           Fire, Ice, Flying, Psychic
44         100 Not in Eggs       0.0640      6.400      11:33        1.49           Fire, Ice, Flying, Psychic
45          NA Not in Eggs       0.0097      0.970      23:58        NULL           Fire, Ice, Flying, Psychic
46          50        5 km       2.3600    236.000      01:42        2.02 Fire, Ice, Poison, Flying, Bug, Rock
47          NA Not in Eggs       0.0740      7.400      01:22        NULL Fire, Ice, Poison, Flying, Bug, Rock
48          50        5 km       2.2800    228.000      02:31  1.86, 1.90          Fire, Flying, Psychic, Rock
49          NA Not in Eggs       0.0720      7.200      23:40        NULL          Fire, Flying, Psychic, Rock
50          50        5 km       0.4000     40.000      02:22        2.69                    Water, Grass, Ice
51          NA Not in Eggs       0.0140      1.400      12:37        NULL                    Water, Grass, Ice
52          50        5 km       0.8600     86.000      02:54        1.98                             Fighting
53          NA Not in Eggs       0.0220      2.200      02:44        NULL                             Fighting
54          50        5 km       2.5400    254.000      03:41        2.27                      Electric, Grass
55          NA Not in Eggs       0.0870      8.700      23:06        NULL                      Electric, Grass
56          50        5 km       0.9200     92.000      12:52  2.17, 2.28               Flying, Psychic, Fairy
57          NA Not in Eggs       0.0310      3.100      12:33        NULL               Flying, Psychic, Fairy
58          50        5 km       0.9200     92.000      03:57  2.31, 2.36                  Water, Ground, Rock
                    next_evolution                      prev_evolution
1      002, 003, Ivysaur, Venusaur                                NULL
2                    003, Venusaur                      001, Bulbasaur
3                             NULL        001, 002, Bulbasaur, Ivysaur
4  005, 006, Charmeleon, Charizard                                NULL
5                   006, Charizard                     004, Charmander
6                             NULL    004, 005, Charmander, Charmeleon
7   008, 009, Wartortle, Blastoise                                NULL
8                   009, Blastoise                       007, Squirtle
9                             NULL       007, 008, Squirtle, Wartortle
10   011, 012, Metapod, Butterfree                                NULL
11                 012, Butterfree                       010, Caterpie
12                            NULL         010, 011, Caterpie, Metapod
13      014, 015, Kakuna, Beedrill                                NULL
14                   015, Beedrill                         013, Weedle
15                            NULL            013, 014, Weedle, Kakuna
16    017, 018, Pidgeotto, Pidgeot                                NULL
17                    018, Pidgeot                         016, Pidgey
18                            NULL         016, 017, Pidgey, Pidgeotto
19                   020, Raticate                                NULL
20                            NULL                        019, Rattata
21                     022, Fearow                                NULL
22                            NULL                        021, Spearow
23                      024, Arbok                                NULL
24                            NULL                          023, Ekans
25                     026, Raichu                                NULL
26                            NULL                        025, Pikachu
27                  028, Sandslash                                NULL
28                            NULL                      027, Sandshrew
29   030, 031, Nidorina, Nidoqueen                                NULL
30                  031, Nidoqueen                029, Nidoran(Female)
31                            NULL 029, 030, Nidoran(Female), Nidorina
32    033, 034, Nidorino, Nidoking                                NULL
33                   034, Nidoking                  032, Nidoran(Male)
34                            NULL   032, 033, Nidoran(Male), Nidorino
35                   036, Clefable                                NULL
36                            NULL                       035, Clefairy
37                  038, Ninetales                                NULL
38                            NULL                         037, Vulpix
39                 040, Wigglytuff                                NULL
40                            NULL                     039, Jigglypuff
41                     042, Golbat                                NULL
42                            NULL                          041, Zubat
43      044, 045, Gloom, Vileplume                                NULL
44                  045, Vileplume                         043, Oddish
45                            NULL             043, 044, Oddish, Gloom
46                   047, Parasect                                NULL
47                            NULL                          046, Paras
48                   049, Venomoth                                NULL
49                            NULL                        048, Venonat
50                    051, Dugtrio                                NULL
51                            NULL                        050, Diglett
52                    053, Persian                                NULL
53                            NULL                         052, Meowth
54                    055, Golduck                                NULL
55                            NULL                        054, Psyduck
56                   057, Primeape                                NULL
57                            NULL                         056, Mankey
58                   059, Arcanine                                NULL
 [ reached 'max' / getOption("max.print") -- omitted 93 rows ]

Data frame structure from JSON

dim(jsonData$pokemon)
[1] 151  17
class(jsonData$pokemon)
[1] "data.frame"
jsonData$pokemon %>% filter(type == "Fire") %>% select(!(img))
    id num       name type height   weight            candy candy_count         egg spawn_chance avg_spawns spawn_time multipliers
1    4 004 Charmander Fire 0.61 m   8.5 kg Charmander Candy          25        2 km       0.2530      25.30      08:45        1.65
2    5 005 Charmeleon Fire 1.09 m  19.0 kg Charmander Candy         100 Not in Eggs       0.0120       1.20      19:00        1.79
3   37 037     Vulpix Fire 0.61 m   9.9 kg     Vulpix Candy          50        5 km       0.2200      22.00      13:43  2.74, 2.81
4   38 038  Ninetales Fire 1.09 m  19.9 kg     Vulpix Candy          NA Not in Eggs       0.0077       0.77      01:32        NULL
5   58 058  Growlithe Fire 0.71 m  19.0 kg  Growlithe Candy          50        5 km       0.9200      92.00      03:57  2.31, 2.36
6   59 059   Arcanine Fire 1.91 m 155.0 kg  Growlithe Candy          NA Not in Eggs       0.0170       1.70      03:11        NULL
7   77 077     Ponyta Fire 0.99 m  30.0 kg     Ponyta Candy          50        5 km       0.5100      51.00      02:50  1.48, 1.50
8   78 078   Rapidash Fire 1.70 m  95.0 kg     Ponyta Candy          NA Not in Eggs       0.0110       1.10      04:00        NULL
9  126 126     Magmar Fire 1.30 m  44.5 kg             None          NA       10 km       0.1000      10.00      20:36        NULL
10 136 136    Flareon Fire 0.89 m  25.0 kg      Eevee Candy          NA Not in Eggs       0.0170       1.70      07:02        NULL
            weaknesses                  next_evolution  prev_evolution
1  Water, Ground, Rock 005, 006, Charmeleon, Charizard            NULL
2  Water, Ground, Rock                  006, Charizard 004, Charmander
3  Water, Ground, Rock                  038, Ninetales            NULL
4  Water, Ground, Rock                            NULL     037, Vulpix
5  Water, Ground, Rock                   059, Arcanine            NULL
6  Water, Ground, Rock                            NULL  058, Growlithe
7  Water, Ground, Rock                   078, Rapidash            NULL
8  Water, Ground, Rock                            NULL     077, Ponyta
9  Water, Ground, Rock                            NULL            NULL
10 Water, Ground, Rock                            NULL      133, Eevee

Going deeper..

class(jsonData$pokemon$type) # Can be lists
[1] "list"
jsonData$pokemon$type
[[1]]
[1] "Grass"  "Poison"

[[2]]
[1] "Grass"  "Poison"

[[3]]
[1] "Grass"  "Poison"

[[4]]
[1] "Fire"

[[5]]
[1] "Fire"

[[6]]
[1] "Fire"   "Flying"

[[7]]
[1] "Water"

[[8]]
[1] "Water"

[[9]]
[1] "Water"

[[10]]
[1] "Bug"

[[11]]
[1] "Bug"

[[12]]
[1] "Bug"    "Flying"

[[13]]
[1] "Bug"    "Poison"

[[14]]
[1] "Bug"    "Poison"

[[15]]
[1] "Bug"    "Poison"

[[16]]
[1] "Normal" "Flying"

[[17]]
[1] "Normal" "Flying"

[[18]]
[1] "Normal" "Flying"

[[19]]
[1] "Normal"

[[20]]
[1] "Normal"

[[21]]
[1] "Normal" "Flying"

[[22]]
[1] "Normal" "Flying"

[[23]]
[1] "Poison"

[[24]]
[1] "Poison"

[[25]]
[1] "Electric"

[[26]]
[1] "Electric"

[[27]]
[1] "Ground"

[[28]]
[1] "Ground"

[[29]]
[1] "Poison"

[[30]]
[1] "Poison"

[[31]]
[1] "Poison" "Ground"

[[32]]
[1] "Poison"

[[33]]
[1] "Poison"

[[34]]
[1] "Poison" "Ground"

[[35]]
[1] "Normal"

[[36]]
[1] "Normal"

[[37]]
[1] "Fire"

[[38]]
[1] "Fire"

[[39]]
[1] "Normal"

[[40]]
[1] "Normal"

[[41]]
[1] "Poison" "Flying"

[[42]]
[1] "Poison" "Flying"

[[43]]
[1] "Grass"  "Poison"

[[44]]
[1] "Grass"  "Poison"

[[45]]
[1] "Grass"  "Poison"

[[46]]
[1] "Bug"   "Grass"

[[47]]
[1] "Bug"   "Grass"

[[48]]
[1] "Bug"    "Poison"

[[49]]
[1] "Bug"    "Poison"

[[50]]
[1] "Ground"

[[51]]
[1] "Ground"

[[52]]
[1] "Normal"

[[53]]
[1] "Normal"

[[54]]
[1] "Water"

[[55]]
[1] "Water"

[[56]]
[1] "Fighting"

[[57]]
[1] "Fighting"

[[58]]
[1] "Fire"

[[59]]
[1] "Fire"

[[60]]
[1] "Water"

[[61]]
[1] "Water"

[[62]]
[1] "Water"    "Fighting"

[[63]]
[1] "Psychic"

[[64]]
[1] "Psychic"

[[65]]
[1] "Psychic"

[[66]]
[1] "Fighting"

[[67]]
[1] "Fighting"

[[68]]
[1] "Fighting"

[[69]]
[1] "Grass"  "Poison"

[[70]]
[1] "Grass"  "Poison"

[[71]]
[1] "Grass"  "Poison"

[[72]]
[1] "Water"  "Poison"

[[73]]
[1] "Water"  "Poison"

[[74]]
[1] "Rock"   "Ground"

[[75]]
[1] "Rock"   "Ground"

[[76]]
[1] "Rock"   "Ground"

[[77]]
[1] "Fire"

[[78]]
[1] "Fire"

[[79]]
[1] "Water"   "Psychic"

[[80]]
[1] "Water"   "Psychic"

[[81]]
[1] "Electric"

[[82]]
[1] "Electric"

[[83]]
[1] "Normal" "Flying"

[[84]]
[1] "Normal" "Flying"

[[85]]
[1] "Normal" "Flying"

[[86]]
[1] "Water"

[[87]]
[1] "Water" "Ice"  

[[88]]
[1] "Poison"

[[89]]
[1] "Poison"

[[90]]
[1] "Water"

[[91]]
[1] "Water" "Ice"  

[[92]]
[1] "Ghost"  "Poison"

[[93]]
[1] "Ghost"  "Poison"

[[94]]
[1] "Ghost"  "Poison"

[[95]]
[1] "Rock"   "Ground"

[[96]]
[1] "Psychic"

[[97]]
[1] "Psychic"

[[98]]
[1] "Water"

[[99]]
[1] "Water"

[[100]]
[1] "Electric"

[[101]]
[1] "Electric"

[[102]]
[1] "Grass"   "Psychic"

[[103]]
[1] "Grass"   "Psychic"

[[104]]
[1] "Ground"

[[105]]
[1] "Ground"

[[106]]
[1] "Fighting"

[[107]]
[1] "Fighting"

[[108]]
[1] "Normal"

[[109]]
[1] "Poison"

[[110]]
[1] "Poison"

[[111]]
[1] "Ground" "Rock"  

[[112]]
[1] "Ground" "Rock"  

[[113]]
[1] "Normal"

[[114]]
[1] "Grass"

[[115]]
[1] "Normal"

[[116]]
[1] "Water"

[[117]]
[1] "Water"

[[118]]
[1] "Water"

[[119]]
[1] "Water"

[[120]]
[1] "Water"

[[121]]
[1] "Water"   "Psychic"

[[122]]
[1] "Psychic"

[[123]]
[1] "Bug"    "Flying"

[[124]]
[1] "Ice"     "Psychic"

[[125]]
[1] "Electric"

[[126]]
[1] "Fire"

[[127]]
[1] "Bug"

[[128]]
[1] "Normal"

[[129]]
[1] "Water"

[[130]]
[1] "Water"  "Flying"

[[131]]
[1] "Water" "Ice"  

[[132]]
[1] "Normal"

[[133]]
[1] "Normal"

[[134]]
[1] "Water"

[[135]]
[1] "Electric"

[[136]]
[1] "Fire"

[[137]]
[1] "Normal"

[[138]]
[1] "Rock"  "Water"

[[139]]
[1] "Rock"  "Water"

[[140]]
[1] "Rock"  "Water"

[[141]]
[1] "Rock"  "Water"

[[142]]
[1] "Rock"   "Flying"

[[143]]
[1] "Normal"

[[144]]
[1] "Ice"    "Flying"

[[145]]
[1] "Electric" "Flying"  

[[146]]
[1] "Fire"   "Flying"

[[147]]
[1] "Dragon"

[[148]]
[1] "Dragon"

[[149]]
[1] "Dragon" "Flying"

[[150]]
[1] "Psychic"

[[151]]
[1] "Psychic"

Data frame structure from JSON

class(jsonData$pokemon$next_evolution[[1]]) # Or lists of data.frames!
[1] "data.frame"
jsonData$pokemon$next_evolution
[[1]]
  num     name
1 002  Ivysaur
2 003 Venusaur

[[2]]
  num     name
1 003 Venusaur

[[3]]
NULL

[[4]]
  num       name
1 005 Charmeleon
2 006  Charizard

[[5]]
  num      name
1 006 Charizard

[[6]]
NULL

[[7]]
  num      name
1 008 Wartortle
2 009 Blastoise

[[8]]
  num      name
1 009 Blastoise

[[9]]
NULL

[[10]]
  num       name
1 011    Metapod
2 012 Butterfree

[[11]]
  num       name
1 012 Butterfree

[[12]]
NULL

[[13]]
  num     name
1 014   Kakuna
2 015 Beedrill

[[14]]
  num     name
1 015 Beedrill

[[15]]
NULL

[[16]]
  num      name
1 017 Pidgeotto
2 018   Pidgeot

[[17]]
  num    name
1 018 Pidgeot

[[18]]
NULL

[[19]]
  num     name
1 020 Raticate

[[20]]
NULL

[[21]]
  num   name
1 022 Fearow

[[22]]
NULL

[[23]]
  num  name
1 024 Arbok

[[24]]
NULL

[[25]]
  num   name
1 026 Raichu

[[26]]
NULL

[[27]]
  num      name
1 028 Sandslash

[[28]]
NULL

[[29]]
  num      name
1 030  Nidorina
2 031 Nidoqueen

[[30]]
  num      name
1 031 Nidoqueen

[[31]]
NULL

[[32]]
  num     name
1 033 Nidorino
2 034 Nidoking

[[33]]
  num     name
1 034 Nidoking

[[34]]
NULL

[[35]]
  num     name
1 036 Clefable

[[36]]
NULL

[[37]]
  num      name
1 038 Ninetales

[[38]]
NULL

[[39]]
  num       name
1 040 Wigglytuff

[[40]]
NULL

[[41]]
  num   name
1 042 Golbat

[[42]]
NULL

[[43]]
  num      name
1 044     Gloom
2 045 Vileplume

[[44]]
  num      name
1 045 Vileplume

[[45]]
NULL

[[46]]
  num     name
1 047 Parasect

[[47]]
NULL

[[48]]
  num     name
1 049 Venomoth

[[49]]
NULL

[[50]]
  num    name
1 051 Dugtrio

[[51]]
NULL

[[52]]
  num    name
1 053 Persian

[[53]]
NULL

[[54]]
  num    name
1 055 Golduck

[[55]]
NULL

[[56]]
  num     name
1 057 Primeape

[[57]]
NULL

[[58]]
  num     name
1 059 Arcanine

[[59]]
NULL

[[60]]
  num      name
1 061 Poliwhirl
2 062 Poliwrath

[[61]]
  num      name
1 062 Poliwrath

[[62]]
NULL

[[63]]
  num     name
1 064  Kadabra
2 065 Alakazam

[[64]]
  num     name
1 065 Alakazam

[[65]]
NULL

[[66]]
  num    name
1 067 Machoke
2 068 Machamp

[[67]]
  num    name
1 068 Machamp

[[68]]
NULL

[[69]]
  num       name
1 070 Weepinbell
2 071 Victreebel

[[70]]
  num       name
1 071 Victreebel

[[71]]
NULL

[[72]]
  num       name
1 073 Tentacruel

[[73]]
NULL

[[74]]
  num     name
1 075 Graveler
2 076    Golem

[[75]]
  num  name
1 076 Golem

[[76]]
NULL

[[77]]
  num     name
1 078 Rapidash

[[78]]
NULL

[[79]]
  num    name
1 080 Slowbro

[[80]]
NULL

[[81]]
  num     name
1 082 Magneton

[[82]]
NULL

[[83]]
NULL

[[84]]
  num   name
1 085 Dodrio

[[85]]
NULL

[[86]]
  num    name
1 087 Dewgong

[[87]]
NULL

[[88]]
  num name
1 089  Muk

[[89]]
NULL

[[90]]
  num     name
1 091 Cloyster

[[91]]
NULL

[[92]]
  num    name
1 093 Haunter
2 094  Gengar

[[93]]
  num   name
1 094 Gengar

[[94]]
NULL

[[95]]
NULL

[[96]]
  num  name
1 097 Hypno

[[97]]
NULL

[[98]]
  num    name
1 099 Kingler

[[99]]
NULL

[[100]]
  num      name
1 101 Electrode

[[101]]
NULL

[[102]]
  num      name
1 103 Exeggutor

[[103]]
NULL

[[104]]
  num    name
1 105 Marowak

[[105]]
NULL

[[106]]
NULL

[[107]]
NULL

[[108]]
NULL

[[109]]
  num    name
1 110 Weezing

[[110]]
NULL

[[111]]
  num   name
1 112 Rhydon

[[112]]
NULL

[[113]]
NULL

[[114]]
NULL

[[115]]
NULL

[[116]]
  num   name
1 117 Seadra

[[117]]
NULL

[[118]]
  num    name
1 119 Seaking

[[119]]
NULL

[[120]]
  num    name
1 121 Starmie

[[121]]
NULL

[[122]]
NULL

[[123]]
NULL

[[124]]
NULL

[[125]]
NULL

[[126]]
NULL

[[127]]
NULL

[[128]]
NULL

[[129]]
  num     name
1 130 Gyarados

[[130]]
NULL

[[131]]
NULL

[[132]]
NULL

[[133]]
  num     name
1 134 Vaporeon
2 135  Jolteon
3 136  Flareon

[[134]]
NULL

[[135]]
NULL

[[136]]
NULL

[[137]]
NULL

[[138]]
  num    name
1 139 Omastar

[[139]]
NULL

[[140]]
  num     name
1 141 Kabutops

[[141]]
NULL

[[142]]
NULL

[[143]]
NULL

[[144]]
NULL

[[145]]
NULL

[[146]]
NULL

[[147]]
  num      name
1 148 Dragonair
2 149 Dragonite

[[148]]
  num      name
1 149 Dragonite

[[149]]
NULL

[[150]]
NULL

[[151]]
NULL

JSON Lab
http://sisbid.github.io/Data-Wrangling/labs/advanced-io-lab.Rmd

Web Scraping

This is data

View the source

What the computer sees

Ways to see the source

Inspect element

Copy XPath

Use SelectorGadget

rvest package

recount_url <- "http://bowtie-bio.sourceforge.net/recount/"
# install.packages("rvest")
library(rvest)
htmlfile = read_html(recount_url)

nds <- html_nodes(htmlfile, xpath = '//*[@id="recounttab"]/table')
dat <- html_table(nds)
dat <- as.data.frame(dat)
head(dat)
       X1                                         X2      X3                               X4                               X5
1   Study                                       PMID Species  Number of biological replicates Number of uniquely aligned reads
2 bodymap not published, but publicly available here   human                               19                    2,197,622,796
3  cheung                                   20856902   human                               41                      834,584,950
4    core                                   19056941   human                                2                        8,670,342
5   gilad                                   20009012   human                                6                       41,356,738
6    maqc                                   20167110   human 14 (technical)**  2 (biological)                       71,970,164
               X6              X7              X8                                              X9
1   ExpressionSet     Count table Phenotype table                                           Notes
2            link            link            link Illumina Human BodyMap 2.0 -- tissue comparison
3            link            link            link                                    HapMap - CEU
4            link            link            link                                lung fibroblasts
5            link            link            link                        liver; males and femlaes
6 original pooled original pooled original pooled                              experiment: MAQC-2

Little cleanup

colnames(dat) <- as.character(dat[1,])
dat <- dat[-1,]
head(dat)
       Study                                       PMID Species  Number of biological replicates Number of uniquely aligned reads
2    bodymap not published, but publicly available here   human                               19                    2,197,622,796
3     cheung                                   20856902   human                               41                      834,584,950
4       core                                   19056941   human                                2                        8,670,342
5      gilad                                   20009012   human                                6                       41,356,738
6       maqc                                   20167110   human 14 (technical)**  2 (biological)                       71,970,164
7 montgomery                                   20220756   human                               60                     *886,468,054
    ExpressionSet     Count table Phenotype table                                           Notes
2            link            link            link Illumina Human BodyMap 2.0 -- tissue comparison
3            link            link            link                                    HapMap - CEU
4            link            link            link                                lung fibroblasts
5            link            link            link                        liver; males and femlaes
6 original pooled original pooled original pooled                              experiment: MAQC-2
7            link            link            link                                    HapMap - CEU

Ethics and Web Scraping

Ethics and Web Scraping

Ethics and Web Scraping

Ethics and Web Scraping

APIs

Application Programming Interfaces

In biology too!

Step 0: Did someone do this already

Step 0: Did someone do this already

Step 1: DIY

https://github.com/ThatCopy/catAPI/wiki/Usage

#install.packages("httr")
library(httr)

# Requests a random cat fact
query_url <- "https://thatcopy.pw/catapi/rest/"

req <- GET(query_url)
content(req)
{html_document}
<html class="no-js" lang="en-US">
[1] <head>\n<meta http-equiv="refresh" content="0">\n<title>thatcopy.pw | 523: Origin is unreachable</title>\n<meta charset="UTF-8"> ...
[2] <body>\n<div id="cf-wrapper">\n\n    \n\n    <div id="cf-error-details" class="p-0">\n        <header class="mx-auto pt-10 lg:pt ...

Not all APIs are “open”

Not all APIs are “open”